UFO Sigthings Near Area51- Visualization

Author

Mrudula Nimmala, Sowmya Sree Kemsaram, Shreyas Shivaji Mali

Loading the necessary libraries

library(lubridate)
library(dplyr)
library(geosphere)
library(leaflet)
library(dbscan)
library(MASS)
library(ggplot2)
library(plotly)

Loading the Data set and perform pre-processing of data

# Loading the dataset
file_path <- "ufo-sightings-transformed.csv"
ufo_dataset <- read.csv(file_path)


# Overview of the dataset
head(ufo_dataset)
  X           Date_time date_documented Year Month Hour Season Country_Code
1 0 1949-10-10 20:30:00       4/27/2004 1949    10   20 Autumn          USA
2 1 1949-10-10 21:00:00      12/16/2005 1949    10   21 Autumn          USA
3 2 1955-10-10 17:00:00       1/21/2008 1955    10   17 Autumn          GBR
4 3 1956-10-10 21:00:00       1/17/2004 1956    10   21 Autumn          USA
5 4 1960-10-10 20:00:00       1/22/2004 1960    10   20 Autumn          USA
6 5 1961-10-10 19:00:00       4/27/2007 1961    10   19 Autumn          USA
         Country    Region       Locale latitude   longitude UFO_shape
1  United States     Texas   San Marcos 29.88306  -97.941111  Cylinder
2  United States     Texas Bexar County 29.38421  -98.581082     Light
3 United Kingdom   England      Chester 53.20000   -2.916667    Circle
4  United States     Texas         Edna 28.97833  -96.645833    Circle
5  United States    Hawaii      Kaneohe 21.41806 -157.803611     Light
6  United States Tennessee      Bristol 36.59500  -82.188889    Sphere
  length_of_encounter_seconds Encounter_Duration
1                        2700         45 minutes
2                        7200            1-2 hrs
3                          20         20 seconds
4                          20           1/2 hour
5                         900         15 minutes
6                         300          5 minutes
                                                                                                                                                 Description
1                    This event took place in early fall around 1949-50. It occurred after a Boy Scout meeting in the Baptist Church. The Baptist Church sit
2                                                            1949 Lackland AFB&#44 TX.  Lights racing across the sky &amp; making 90 degree turns on a dime.
3                                                                                                        Green/Orange circular disc over Chester&#44 England
4                 My older brother and twin sister were leaving the only Edna theater at about 9 PM&#44...we had our bikes and I took a different route home
5 AS a Marine 1st Lt. flying an FJ4B fighter/attack aircraft on a solo night exercise&#44 I was at 50&#44000&#39 in a &quot;clean&quot; aircraft (no ordinan
6                 My father is now 89 my brother 52 the girl with us now 51 myself 49 and the other fellow which worked with my father if he&#39s still livi
summary(ufo_dataset)
       X          Date_time         date_documented         Year     
 Min.   :    0   Length:80328       Length:80328       Min.   :1906  
 1st Qu.:20082   Class :character   Class :character   1st Qu.:2001  
 Median :40164   Mode  :character   Mode  :character   Median :2006  
 Mean   :40164                                         Mean   :2004  
 3rd Qu.:60245                                         3rd Qu.:2011  
 Max.   :80327                                         Max.   :2014  
     Month             Hour          Season          Country_Code      
 Min.   : 1.000   Min.   : 0.00   Length:80328       Length:80328      
 1st Qu.: 4.000   1st Qu.:10.00   Class :character   Class :character  
 Median : 7.000   Median :19.00   Mode  :character   Mode  :character  
 Mean   : 6.835   Mean   :15.53                                        
 3rd Qu.: 9.000   3rd Qu.:21.00                                        
 Max.   :12.000   Max.   :23.00                                        
   Country             Region             Locale             latitude     
 Length:80328       Length:80328       Length:80328       Min.   :-82.86  
 Class :character   Class :character   Class :character   1st Qu.: 34.13  
 Mode  :character   Mode  :character   Mode  :character   Median : 39.41  
                                                          Mean   : 38.12  
                                                          3rd Qu.: 42.79  
                                                          Max.   : 72.70  
   longitude        UFO_shape         length_of_encounter_seconds
 Min.   :-176.66   Length:80328       Min.   :       0           
 1st Qu.:-112.07   Class :character   1st Qu.:      30           
 Median : -87.90   Mode  :character   Median :     180           
 Mean   : -86.77                      Mean   :    9017           
 3rd Qu.: -78.75                      3rd Qu.:     600           
 Max.   : 178.44                      Max.   :97836000           
 Encounter_Duration Description       
 Length:80328       Length:80328      
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
# Checking for missing values
colSums(ufo_dataset == "" | is.na(ufo_dataset))
                          X                   Date_time 
                          0                           0 
            date_documented                        Year 
                          0                           0 
                      Month                        Hour 
                          0                           0 
                     Season                Country_Code 
                          0                         259 
                    Country                      Region 
                        259                         566 
                     Locale                    latitude 
                        457                           0 
                  longitude                   UFO_shape 
                          0                        1930 
length_of_encounter_seconds          Encounter_Duration 
                          0                           0 
                Description 
                         15 
# Removing rows with missing or blank values
ufo_dataset <- ufo_dataset[!(apply(ufo_dataset, 1, function(row) any(row == "" | is.na(row)))), ]

# Converting Date_time to datetime
ufo_dataset$Date_time <- ymd_hms(ufo_dataset$Date_time)

# Converting date_documented to date
ufo_dataset$date_documented <- mdy(ufo_dataset$date_documented)



# Converting columns to categorical variables (factors)
ufo_dataset$Season <- as.factor(ufo_dataset$Season)
ufo_dataset$Country_Code <- as.factor(ufo_dataset$Country_Code)
ufo_dataset$Country <- as.factor(ufo_dataset$Country)
ufo_dataset$Region <- as.factor(ufo_dataset$Region)
ufo_dataset$Locale <- as.factor(ufo_dataset$Locale)
ufo_dataset$UFO_shape <- as.factor(ufo_dataset$UFO_shape)

Get Area-51 Coordinates.

# Coordinates for Area 51
area51_coords <- c(-115.808, 37.233)

# Function to calculate distance from Area 51
calculate_distance <- function(lon, lat) {
  # Converting to miles
  dist <- distGeo(c(lon, lat), area51_coords) / 1609.34  
  return(dist)
}

# Adding a distance column to the dataset
data <- ufo_dataset %>%
  mutate(Distance_to_Area51 = mapply(calculate_distance, longitude, latitude))

# Filtering for sightings within 50 miles of Area 51
area51_sightings <- data %>%
  filter(Distance_to_Area51 <= 50)

Pointing the UFO sigthings near Area 51

leaflet(data = area51_sightings) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~longitude,
    lat = ~latitude,
    popup = ~paste(
      "Date and Time:", Date_time, "<br>",
      "Season:", Season, "<br>",
      "Region:", Region, "<br>",
      "Locale:", Locale
    ),
    radius = 5,
    color = "red",
    fillOpacity = 0.8
  ) %>%
  setView(lng = -115.808, lat = 37.233, zoom = 8)
library(plotly)
# Defining Area 51 coordinates
area51_coords <- data.frame(longitude = -115.808, latitude = 37.233)

# Adjusting the density plot and annotating Area 51
gg_density <- ggplot(area51_sightings, aes(x = longitude, y = latitude)) +
  geom_density2d() +
  stat_density2d(aes(fill = ..level..), geom = "polygon", alpha = 0.4) +
  geom_point(data = area51_coords, aes(x = longitude, y = latitude), 
             color = "red", size = 3) +  
  annotate("text", x = -115.808, y = 37.233, label = "Area 51", 
           color = "red", size = 5, hjust = 0, vjust = -1) + 
  labs(title = "Density of UFO Sightings Near Area 51",
       x = "Longitude", y = "Latitude") +
  xlim(-116, -115.5) +  
  ylim(36.5, 37.5) +    
  theme_minimal()

# Converting to an interactive plot
interactive_density <- ggplotly(gg_density)

# Displaying the plot
interactive_density

Time-series patterns at AREA 51

Extracting Date and Time From the Dataset

area51_sightings$Year <- format(area51_sightings$Date_time, "%Y")
area51_sightings$Month <- format(area51_sightings$Date_time, "%m")
area51_sightings$Day <- format(area51_sightings$Date_time, "%d")
area51_sightings$Hour <- format(area51_sightings$Date_time, "%H")

Analysing Sightings by Year

# Grouping by year and count sightings
sightings_by_year <- area51_sightings %>%
  group_by(Year) %>%
  summarise(Sightings = n())

# Plotting the trend over years
ggplot(sightings_by_year, aes(x = as.numeric(Year), y = Sightings)) +
  geom_line(color = "blue", size = 1) +
  geom_point(size = 2) +
  labs(title = "UFO Sightings Near Area 51 Over the Years",
       x = "Year", y = "Number of Sightings") +
  theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Analyze Sightings by Month

# Grouping by month and count sightings
sightings_by_month <- area51_sightings %>%
  group_by(Month) %>%
  summarise(Sightings = n())

# Plotting the trend over months
ggplot(sightings_by_month, aes(x = Month, y = Sightings)) +
  geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
  labs(title = "UFO Sightings Near Area 51 by Month",
       x = "Month", y = "Number of Sightings") +
  theme_minimal()

Analyzing Sightings by Hour

# Grouping by hour and count sightings
sightings_by_hour <- area51_sightings %>%
  group_by(Hour) %>%
  summarise(Sightings = n())

# Plotting the trend over hours
ggplot(sightings_by_hour, aes(x = as.numeric(Hour), y = Sightings)) +
  geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
  labs(title = "UFO Sightings Near Area 51 by Hour",
       x = "Hour of Day", y = "Number of Sightings") +
  theme_minimal()

Adding Time Filters to Spatial Data

# Filtering for July evenings (8 PM–10 PM)
july_evenings <- area51_sightings %>%
  filter(Month == "07" & Hour >= 20 & Hour <= 22)

# Plotting heat map for July evenings
ggplot(july_evenings, aes(x = longitude, y = latitude)) +
  geom_density2d() +
  stat_density2d(aes(fill = ..level..), geom = "polygon", alpha = 0.5) +
  scale_fill_gradient(low = "lightblue", high = "red") +
  labs(title = "UFO Sightings in July Evenings Near Area 51",
       x = "Longitude", y = "Latitude") +
  theme_minimal()

coordinates <- data.frame(
  longitude = july_evenings$longitude,
  latitude = july_evenings$latitude,
  Region = july_evenings$Region,
  Locale = july_evenings$Locale
)

# Computing density
density_estimation <- kde2d(coordinates$longitude, coordinates$latitude, n = 100)

# Converting density grid into a data frame
density_data <- as.data.frame(expand.grid(
  longitude = density_estimation$x,
  latitude = density_estimation$y
))

# Adding density levels
density_data$density <- as.vector(density_estimation$z)

# Step 1: Filter for high-density regions (top 5% density values)
high_density_points <- density_data %>%
  filter(density >= quantile(density, 0.95))

# Step 2: Removing duplicates for latitude and longitude
high_density_unique <- high_density_points %>%
  distinct(longitude, latitude, .keep_all = TRUE)

Ploting UFO Sightings Near Area 51 During July-Evenings

# Perform clustering on latitude and longitude
coordinates <- july_evenings[, c("longitude", "latitude")]
dbscan_result <- dbscan(coordinates, eps = 0.01, minPts = 5)

# Add cluster labels to data
july_evenings$Cluster <- factor(dbscan_result$cluster)

# Plot clusters
ggplot(july_evenings, aes(x = longitude, y = latitude, color = Cluster)) +
  geom_point(size = 2, alpha = 0.8) +
  labs(title = "UFO Sightings Clusters Near Area 51 During July-eveings.",
       x = "Longitude", y = "Latitude", color = "Cluster") +
  theme_minimal()

UFO Sightings Clusters with Connections Near Area 51 During July-Evenings

# Connect points by cluster (lines)
july_evenings_cluster_lines <- plot_ly(
  data = july_evenings,
  x = ~longitude,
  y = ~latitude,
  color = ~Cluster,
  type = 'scatter',
  mode = 'markers+lines',  # Add lines between points
  marker = list(size = 10, opacity = 0.8),
  text = ~paste(
    "Cluster:", Cluster, "<br>",
    "Longitude:", longitude, "<br>",
    "Latitude:", latitude, "<br>",
    "Locale:", Locale
  )
) %>%
  layout(
    title = "UFO Sightings Clusters with Connections Near Area 51 During July-Eveings",
    xaxis = list(title = "Longitude"),
    yaxis = list(title = "Latitude"),
    hovermode = "closest"
  )

# Display the interactive plot
july_evenings_cluster_lines

Performing Clustering at UFO Sightings Near Area 51

# Performing clustering on latitude and longitude
coordinates <- area51_sightings[, c("longitude", "latitude")]
dbscan_result <- dbscan(coordinates, eps = 0.01, minPts = 5)

# Adding cluster labels to data
area51_sightings$Cluster <- factor(dbscan_result$cluster)

# Plotting clusters
ggplot(area51_sightings, aes(x = longitude, y = latitude, color = Cluster)) +
  geom_point(size = 2, alpha = 0.8) +
  labs(title = "UFO Sightings Clusters Near Area 51",
       x = "Longitude", y = "Latitude", color = "Cluster") +
  theme_minimal()

UFO Sightings Clusters with Connections Near Area 51

# Connecting points by cluster (lines)
plotly_cluster_lines <- plot_ly(
  data = area51_sightings,
  x = ~longitude,
  y = ~latitude,
  color = ~Cluster,
  type = 'scatter',
  mode = 'markers+lines',  
  marker = list(size = 10, opacity = 0.8),
  text = ~paste(
    "Cluster:", Cluster, "<br>",
    "Longitude:", longitude, "<br>",
    "Latitude:", latitude
  )
) %>%
  layout(
    title = "UFO Sightings Clusters with Connections Near Area 51",
    xaxis = list(title = "Longitude"),
    yaxis = list(title = "Latitude"),
    hovermode = "closest"
  )

# Displaying the interactive plot
plotly_cluster_lines